home
***
CD-ROM
|
disk
|
FTP
|
other
***
search
/
Collection of Tools & Utilities
/
Collection of Tools and Utilities.iso
/
edit
/
jwpsrc.zip
/
JIS.C
< prev
next >
Wrap
C/C++ Source or Header
|
1993-03-31
|
31KB
|
1,291 lines
/* Copyright (C) Ken R. Lunde, 1991-1992. All rights reserved. */
/* These routines originally came from Ken R. Lunde's JCONV.C program, */
/* version 2.3, August 12, 1992. Many thanks to Ken for his donation. */
/* The original code was reformatted (to fit the style of the rest */
/* of the program). Because EUC is used as the internal representa- */
/* tion, code corresponding to conversions not involving EUC is */
/* deleted. Also, the original file-based character I/O machanism */
/* is changed. */
/* A modified version of the original documentation follows... */
/*
Program: jconv.c
Version: 2.3
Date: August 12, 1992
Author: Ken R. Lunde, Adobe Systems Incorporated
EMAIL: lunde@mv.us.adobe.com
MAIL : 1585 Charleston Road, P.O. Box 7900, Mountain View, CA 94039-7900
Type: A tool for converting the Japanese code of Japanese textfiles.
Code: ANSI C (portable)
PORTABILITY:
This source code was written so that it would be portable on C compilers which
conform to the ANSI C standard. It has been tested on a variety of compilers.
I used THINK C and GNU C as my development platforms. I left in the Macintosh-
specific lines of code so that it would be easier to enhance/debug this tool
later. For those of you who wish to use this tool on the Macintosh, simply
add the ANSI library to the THINK C project, and then build the application.
Be sure that THINK_C has been defined, though, as the conditional compilation
depends on it. You then have a double-clickable application, which when
launched, will greet you with a Macintosh-style interface.
DISTRIBUTION AND RESTRICTIONS ON USAGE:
1) Please give this source code away to your friends at no charge.
2) Please try to compile this source code on various platforms to check for
portablity, and please report back to me with any results be they good or
bad. Suggestions are always welcome.
3) Only use this tool on a copy of a file -- do not use an original. This
is just common sense.
4) This source code or a compiled version may be bundled with commercial
software as long as the author is notified beforehand. The author's name
should also be mentioned in the credits.
5) Feel free to use any of the algorithms for your own work. Many of them are
being used in other tools I have written.
6) The most current version can be obtained by requesting a copy directly
from me.
DESCRIPTION:
1) Supports Shift-JIS, EUC, New-JIS, Old-JIS, and NEC-JIS for both input and
output.
2) Automatically detects infile's Japanese code (the ability to force an
input Japanese code is also supported through a command-line option).
3) The ability to convert Shift-JIS and EUC half-width katakana into full-
width equivalents. Note that half-width katakana includes other symbols
such as a Japanese period, Japanese comma, center dot, etc.
4) Supports conversion between the same code (i.e., EUC -> EUC, Shift-JIS ->
Shift-JIS, etc.). This is useful as a filter for converting half-width
katakana to their full-width equivalents.
5) If the infile does not contain any Japanese, then its contents are
echoed to the outfile. It will also try to repair the file as though
it had stripped escape characters (see #6 below).
6) The functionality of my other tool called repair-jis.c is included in
this tool by using the "-r[CODE]" option. This will recover stripped
escape characters, then convert the file to be in CODE format.
*/
#include "jwp.h"
#include <ctype.h>
#include <errno.h>
#define TOFULLSIZE TRUE
#define UNGETBUFSIZ 10
#define ReadChar() ((stackp > 0) ? UngetBuf[--stackp] : f_charin())
#define WriteChar(x) f_charout(x)
#define UnreadChar(x) UngetBuf[stackp++] = (x)
#define FF_EUCSJIS (FF_NEC + 1)
#define NUL 0
#define NL 10
#define FF 12
#define CR 13
#define ESC 27
#define SJIS1(A) ((A >= 129 && A <= 159) || (A >= 224 && A <= 239))
#define SJIS2(A) (A >= 64 && A <= 252)
#define HANKATA(A) (A >= 161 && A <= 223)
#define ISEUC(A) (A >= 161 && A <= 254)
#define ISMARU(A) (A >= 202 && A <= 206)
#define ISNIGORI(A) ((A >= 182 && A <= 196) || (A >= 202 && A <= 206))
void StraightEcho (void);
void HanToZen(int *p1, int *p2, FILEFORMAT incode);
void SJisToJis(int *p1, int *p2);
void JisToSJis(int *p1, int *p2);
void ShiftToEuc(FILEFORMAT incode);
void EucToSeven(FILEFORMAT incode, char *ki, char *ko);
void EucToEuc(FILEFORMAT incode);
void EucToShift(FILEFORMAT incode);
void SevenToEuc(void);
void JisRepair(FILEFORMAT outcode, char *ki, char *ko);
BOOL SkipESCSeq(int ch, int *TwoBytes);
FILEFORMAT DetectCodeType(void);
/* The I/O Routines */
static int (* f_charin)(void);
static void (* f_charout)(int);
static int UngetBuf[UNGETBUFSIZ];
static int stackp;
void SetupIO (int (* in)(void), void (* out)(int))
{
f_charin = in;
f_charout = out;
stackp = 0;
}
static void OutPuts (char *buf)
{
for (; *buf; buf++) WriteChar(*buf);
}
/* Convert input file ==> EUC */
void FileImport (FILEFORMAT incode)
{
switch (incode) {
case FF_UNKNOWN: StraightEcho(); break;
case FF_OLDJIS:
case FF_NEWJIS:
case FF_NEC: SevenToEuc(); break;
case FF_EUC: EucToEuc(incode); break;
case FF_SJIS: ShiftToEuc(incode); break;
}
}
/* Convert internal EUC ==> Output format */
void FileExport (FILEFORMAT outcode)
{
switch (outcode) {
case FF_NEWJIS: EucToSeven(FF_EUC, "$B", "(J"); break;
case FF_OLDJIS: EucToSeven(FF_EUC, "$@", "(J"); break;
case FF_NEC: EucToSeven(FF_EUC, "K", "H"); break;
case FF_UNKNOWN:
case FF_EUC: /*EucToEuc(FF_EUC); */ StraightEcho(); break;
case FF_SJIS: EucToShift(FF_EUC); break;
}
}
#ifdef FOOBAR
void main(int argc,char **argv)
{
*out;
#ifndef THINK_C
int rc;
#endif
int tempincode,incode,doing = FALSE,forcedelesc = FALSE;
int makeoutfile = TRUE,outcode = FF_UNKNOWN,delesc = FALSE;
int repairjis = FALSE,TOFULLSIZE = FALSE,setincode = FALSE,docheck = FALSE;
char infilename[100],outfilename[100],extension[10],ki[10],ko[10],toolname[20];
#ifdef THINK_C
argc = ccommand(&argv);
#endif
strcpy(toolname,*argv);
while (--argc > 0 && (*++argv)[0] == '-')
switch (ToUpperCase(*++argv[0])) {
case 'C' :
docheck = TRUE;
break;
case 'F' :
TOFULLSIZE = TRUE;
break;
case 'H' :
dohelp(toolname);
break;
case 'I' :
setincode = TRUE;
doing = INPUT;
incode = getcode(extension,ToUpperCase(*++argv[0]),ki,ko,doing);
break;
case 'O' :
doing = OUTPUT;
outcode = getcode(extension,ToUpperCase(*++argv[0]),ki,ko,doing);
break;
case 'R' :
repairjis = TRUE;
doing = REPAIR;
outcode = getcode(extension,ToUpperCase(*++argv[0]),ki,ko,doing);
break;
case 'S' :
delesc = TRUE;
strcpy(extension,".rem");
if (ToUpperCase(*++argv[0]) == 'F')
forcedelesc = TRUE;
break;
case 'T' :
switch (ToUpperCase(*++argv[0])) {
case 'E' :
doeuctable();
break;
case 'J' :
case 'N' :
case 'O' :
dojistable();
break;
case 'S' :
dosjistable();
break;
default :
dojistable();
dosjistable();
doeuctable();
break;
}
exit(0);
break;
case 'V' :
break;
default :
fprintf(stderr,"Illegal option \"-%c\"! Try using the \"-h\" option for help.\n",*argv[0]);
fprintf(stderr,"Usage: %s [-options] [infile] [outfile]\nExiting...\n",toolname);
exit(1);
break;
}
if (repairjis && delesc) {
fprintf(stderr,"Error! Both \"-r\" and \"-s\" options cannot be selected! Exiting...\n");
exit